/-
Copyright (c) 2020 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
-/
module

prelude
import all Init.Data.ByteArray.Basic
public import Init.Data.String.Basic
import all Init.Data.String.Basic
public import Init.Data.String.Substring
public import Init.Data.String.Modify
import Init.Data.String.Search

public section

namespace String

@[deprecated ByteArray.utf8DecodeChar? (since := "2025-10-01")]
abbrev utf8DecodeChar? (a : ByteArray) (i : Nat) : Option Char :=
  a.utf8DecodeChar? i

/--
Checks whether an array of bytes is a valid UTF-8 encoding of a string.
-/
@[deprecated ByteArray.validateUTF8 (since := "2025-10-01")]
abbrev validateUTF8 (a : ByteArray) : Bool :=
  a.validateUTF8

private def findLeadingSpacesSize (s : String) : Nat :=
  let it := s.startPos
  let it := it.find? (· == '\n') |>.bind String.Pos.next?
  match it with
  | some it => consumeSpaces it 0 s.length
  | none => 0
where
  consumeSpaces {s : String} (it : s.Pos) (curr min : Nat) : Nat :=
    if h : it.IsAtEnd then min
    else if it.get h == ' ' || it.get h == '\t' then consumeSpaces (it.next h) (curr + 1) min
    else if it.get h == '\n' then findNextLine (it.next h) min
    else findNextLine (it.next h) (Nat.min curr min)
  termination_by it
  findNextLine {s : String} (it : s.Pos) (min : Nat) : Nat :=
    if h : it.IsAtEnd then min
    else if it.get h == '\n' then consumeSpaces (it.next h) 0 min
    else findNextLine (it.next h) min
  termination_by it

private def removeNumLeadingSpaces (n : Nat) (s : String) : String :=
  consumeSpaces n s.startPos ""
where
  consumeSpaces (n : Nat) {s : String} (it : s.Pos) (r : String) : String :=
     match n with
     | 0 => saveLine it r
     | n+1 =>
       if h : it.IsAtEnd then r
       else if it.get h == ' ' || it.get h == '\t' then consumeSpaces n (it.next h) r
       else saveLine it r
  termination_by (it, 1)
  saveLine {s : String} (it : s.Pos) (r : String) : String :=
    if h : it.IsAtEnd then r
    else if it.get h  == '\n' then consumeSpaces n (it.next h) (r.push '\n')
    else saveLine (it.next h) (r.push (it.get h))
  termination_by (it, 0)

/--
Consistently de-indents the lines in a string, removing the same amount of leading whitespace from
each line such that the least-indented line has no leading whitespace.

The number of leading whitespace characters to remove from each line is determined by counting the
number of leading space (`' '`) and tab (`'\t'`) characters on lines after the first line that also
contain non-whitespace characters. No distinction is made between tab and space characters; both
count equally.

The least number of leading whitespace characters found is then removed from the beginning of each
line. The first line's leading whitespace is not counted when determining how far to de-indent the
string, but leading whitespace is removed from it.

Examples:
* `"Here:\n  fun x =>\n    x + 1".removeLeadingSpaces = "Here:\nfun x =>\n  x + 1"`
* `"Here:\n\t\tfun x =>\n\t  \tx + 1".removeLeadingSpaces = "Here:\nfun x =>\n \tx + 1"`
* `"Here:\n\t\tfun x =>\n \n\t  \tx + 1".removeLeadingSpaces = "Here:\nfun x =>\n\n \tx + 1"`
-/
def removeLeadingSpaces (s : String) : String :=
  let n := findLeadingSpacesSize s
  if n == 0 then s else removeNumLeadingSpaces n s

/--
Replaces each `\r\n` with `\n` to normalize line endings, but does not validate that there are no
isolated `\r` characters.

This is an optimized version of `String.replace text "\r\n" "\n"`.
-/
def crlfToLf (text : String) : String :=
  go "" 0 0
where
  go (acc : String) (accStop pos : String.Pos.Raw) : String :=
    if h : pos.atEnd text then
      -- note: if accStop = 0 then acc is empty
      if accStop = 0 then text else acc ++ accStop.extract text pos
    else
      let c := pos.get' text h
      let pos' := pos.next' text h
      if h' : ¬ pos'.atEnd text ∧ c == '\r' ∧ pos'.get text == '\n' then
        let acc := acc ++ accStop.extract text pos
        go acc pos' (pos'.next' text h'.1)
      else
        go acc accStop pos'
  termination_by text.utf8ByteSize - pos.byteIdx
  decreasing_by
    decreasing_with
      change text.utf8ByteSize - ((pos.next text).next text).byteIdx < text.utf8ByteSize - pos.byteIdx
      have k := Nat.gt_of_not_le <| mt decide_eq_true h
      exact Nat.sub_lt_sub_left k (Nat.lt_trans (String.Pos.Raw.lt_next text pos) (String.Pos.Raw.lt_next _ _))
    decreasing_with
      change text.utf8ByteSize - (pos.next text).byteIdx < text.utf8ByteSize - pos.byteIdx
      have k := Nat.gt_of_not_le <| mt decide_eq_true h
      exact Nat.sub_lt_sub_left k (String.Pos.Raw.lt_next _ _)

end String
